#include	"scalelin.h"

#if LITTLE_ENDIAN

#define	INTEL_SWAP16( value )	value = REVERSEu16( value )
#define	INTEL_SWAP32( value )	value = REVERSEu32( value )

#else

#define	INTEL_SWAP16( value )
#define	INTEL_SWAP32( value )

#endif

//----------------------------------------------------------------------------------------
// Divisor to shift count table
//----------------------------------------------------------------------------------------
static CHAR	cnt_to_shifts[256] =
{
	0,
	1,
	-1,	
	2,
	-1,-1,-1,
	3,
	-1,-1,-1,-1, -1,-1,-1,
	4,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,
	5,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,
	6,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,
	7,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,
	8
};

//----------------------------------------------------------------------------------------
// Grow a line (1 - 8 bits)
// Function result:		-
// e:									negative destination width = - dw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	grow_line_px1_8( LONG e, LONG sw, LONG dw, LONG cnt,
											 LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
											 ULONG *src, ULONG *dst )
{
	LONG		shift_bits;
	ULONG	src_word;
	ULONG	dst_word;

	shift_bits = 32 - bits;																		// # of shifts to the right for the first pixel

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;
	src_bit_offset &= 31;
	dst_bit_offset &= 31;

	src_word = *src++;
	INTEL_SWAP32( src_word );
	src_word <<= src_bit_offset;
	dst_word = *dst;
	INTEL_SWAP32( dst_word );
	dst_word >>= ( 32 - dst_bit_offset );

	src_bit_offset -= 32;																			// (negative) # of bits that have to be read
	dst_bit_offset -= 32;																			// (negative) # of bits that have to be written
	
	while ( cnt >= 0 )
	{
		dst_word <<= bits;
		dst_word |= src_word >> shift_bits;											// insert source pixel
		dst_bit_offset += bits;																	// next destination bit offset

		if ( dst_bit_offset == 0 )															// destination word full?
		{
			INTEL_SWAP32( dst_word );
			*dst++ = dst_word;
			dst_bit_offset = -32;
		}
				
		e += sw;																								// add source width to the discriminator
		
		if ( e >= 0 )																						// read the next source pixel?
		{
			e -= dw;																							// subtract destination width from the discriminator

			src_word <<= bits;																		// next source pixel
			src_bit_offset += bits;																// next source bit offset

			if ( src_bit_offset == 0 )														// source word empty?
			{
				src_word = *src++;
				INTEL_SWAP32( src_word );
				src_bit_offset = -32;
			}
		}
		cnt--;
	}

	if ( dst_bit_offset > -32 )																// write the last destination word?
	{
		ULONG	mask;

		dst_bit_offset = - dst_bit_offset;
		dst_word <<= dst_bit_offset;
		mask = ( 1L << dst_bit_offset ) - 1;
		INTEL_SWAP32( dst_word );
		INTEL_SWAP32( mask );
		dst_word |= *dst & mask;
		*dst++ = dst_word;
	}
}

//----------------------------------------------------------------------------------------
// Grow a line (16 bits)
// Function result:		-
// e:									negative destination width = - dw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	grow_line_px16( LONG e, LONG sw, LONG dw, LONG cnt,
											LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
					 						UWORD *src, UWORD *dst )
{
	ULONG	src_word;

	(void) bits;

	src = (UWORD *) ((UCHAR *) src + ( src_bit_offset >> 3 ));
	dst = (UWORD *) ((UCHAR *) dst + ( dst_bit_offset >> 3 ));
	
	src_word = *src++;

	while ( cnt >= 0 )
	{
		*dst++ = src_word;
				
		e += sw;
		
		if ( e >= 0 )
		{
			e -= dw;
			src_word = *src++;
		}
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Grow a line (24 bits)
// Function result:		-
// e:									negative destination width = - dw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	grow_line_px24( LONG e, LONG sw, LONG dw, LONG cnt,
											LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
									 		UCHAR *src, UCHAR *dst )
{
	UCHAR	src_c0;
	UCHAR	src_c1;
	UCHAR	src_c2;
	
	(void) bits;

	src += src_bit_offset >> 3;
	dst += dst_bit_offset >> 3;

	src_c0 = *src++;
	src_c1 = *src++;
	src_c2 = *src++;

	while ( cnt >= 0 )
	{
		*dst++ = src_c0;
		*dst++ = src_c1;
		*dst++ = src_c2;
				
		e += sw;
		
		if ( e >= 0 )
		{
			e -= dw;
			src_c0 = *src++;
			src_c1 = *src++;
			src_c2 = *src++;
		}
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Grow a line (32 bits)
// Function result:		-
// e:									negative destination width = - dw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	grow_line_px32( LONG e, LONG sw, LONG dw, LONG cnt,
											LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
					 						ULONG *src, ULONG *dst )
{
	ULONG	src_word;
	
	(void) bits;

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;
	src_word = *src++;

	while ( cnt >= 0 )
	{
		*dst++ = src_word;
				
		e += sw;
		
		if ( e >= 0 )
		{
			e -= dw;
			src_word = *src++;
		}
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (1 - 8 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_px1_8( LONG e, LONG sw, LONG dw, LONG cnt,
												 LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
												 ULONG *src, ULONG *dst )
{
	LONG		shift_bits;
	ULONG	src_word;
	ULONG	dst_word;

	shift_bits = 32 - bits;																		// # of shifts to the right for the first pixel

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;
	src_bit_offset &= 31;
	dst_bit_offset &= 31;

	src_word = *src++;
	INTEL_SWAP32( src_word );
	src_word <<= src_bit_offset;
	dst_word = *dst;
	INTEL_SWAP32( dst_word );
	dst_word >>= ( 32 - dst_bit_offset );

	src_bit_offset -= 32;																			// (negative) # of bits that have to be read
	dst_bit_offset -= 32;																			// (negative) # of bits that have to be written
		
	while ( cnt >= 0 )
	{
		ULONG	src_value;

		src_value = 0;
		
		while ( e < 0 )																					// read another source pixel?
		{
			ULONG	tmp;
	
			if ( src_bit_offset == 0 )
			{
				src_word = *src++;
				INTEL_SWAP32( src_word );
				src_bit_offset = -32;
			}

			tmp =	src_word >> shift_bits;													// source pixel
			if ( tmp > src_value )
				src_value = tmp;

			src_word <<= bits;
			src_bit_offset += bits;

			e += dw;
		}

		dst_word <<= bits;
		dst_word |= src_value;																	// insert source pixel
		dst_bit_offset += bits;

		if ( dst_bit_offset == 0 )
		{
			INTEL_SWAP32( dst_word );
			*dst++ = dst_word;
			dst_bit_offset = -32;
		}

		e -= sw;

		cnt--;
	}

	if ( dst_bit_offset > -32 )
	{
		ULONG	mask;

		dst_bit_offset = - dst_bit_offset;
		dst_word <<= dst_bit_offset;
		mask = ( 1L << dst_bit_offset ) - 1;
		INTEL_SWAP32( dst_word );
		INTEL_SWAP32( mask );
		dst_word |= *dst & mask;
		*dst++ = dst_word;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (8 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_A8( LONG e, LONG sw, LONG dw, LONG cnt,
											LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
											UCHAR *src, UCHAR *dst )
{
	(void) bits;

	src += src_bit_offset >> 3;
	dst += dst_bit_offset >> 3;
	
	while ( cnt >= 0 )
	{
		LONG		shifts;
		LONG		src_cnt;
		ULONG	src_gray;

		src_cnt = 0;
		src_gray = 0;

		while ( e < 0 )
		{
			src_gray += *src++;
			src_cnt++;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )																			// can we shift?
			src_gray >>= shifts;
		else																										// we have to divide
			src_gray /= src_cnt;

		*dst++ = (UCHAR) src_gray;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (16 bits, 3 components  5 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_xABC15( LONG e, LONG sw, LONG dw, LONG cnt,
												  LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
												  UWORD *src, UWORD *dst )
{
	(void) bits;

	src += src_bit_offset >> 4;
	dst += dst_bit_offset >> 4;
	
	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG		shifts;
		LONG		src_cnt;
		ULONG	src_red;
		ULONG	src_green;
		ULONG	src_blue;

		src_cnt = 0;
		src_red = 0;
		src_green = 0;
		src_blue = 0;

		while ( e < 0 )
		{
			src_word = *src++;
			INTEL_SWAP16( src_word );

			src_cnt++;
			src_red += ( src_word >> 10 ) & 0x001fL;
			src_green += ( src_word >> 5 ) & 0x001fL;
			src_blue += src_word & 0x001fL;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )
		{
			src_red >>= shifts;
			src_green >>= shifts;
			src_blue >>= shifts;
		}
		else
		{
			src_red /= src_cnt;
			src_green /= src_cnt;
			src_blue /= src_cnt;
		}

		dst_word = ( src_red << 10 ) | ( src_green << 5 ) | src_blue;
		INTEL_SWAP16( dst_word );
		*dst++ = (UWORD) dst_word;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (16 bits, 2 independent components  8 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_AB16( LONG e, LONG sw, LONG dw, LONG cnt,
												LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
												UWORD *src, UWORD *dst )
{
	(void) bits;

	src += src_bit_offset >> 4;
	dst += dst_bit_offset >> 4;
	
	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG	shifts;
		LONG	src_cnt;
		ULONG	src_a;
		ULONG	src_b;

		src_cnt = 0;
		src_a = 0;
		src_b = 0;

		while ( e < 0 )
		{
			src_word = *src++;
			INTEL_SWAP16( src_word );

			src_cnt++;
			src_a += ( src_word >> 8 ) & 0x00ffL;
			src_b += src_word & 0x00ffL;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )
		{
			src_a >>= shifts;
			src_b >>= shifts;
		}
		else
		{
			src_a /= src_cnt;
			src_b /= src_cnt;
		}

		dst_word = ( src_a << 8 ) | src_b;
		INTEL_SWAP16( dst_word );
		*dst++ = (UWORD) dst_word;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (16 bits, 2 components  8 bits, first one is alpha)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_AG16( LONG e, LONG sw, LONG dw, LONG cnt,
												LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
												UWORD *src, UWORD *dst )
{
	(void) bits;

	src += src_bit_offset >> 4;
	dst += dst_bit_offset >> 4;
	
	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG	src_cnt;
		ULONG	src_alpha;
		ULONG	src_gray;

		src_cnt = 0;
		src_alpha = 0;
		src_gray = 0;

		while ( e < 0 )
		{
			ULONG	alpha;

			src_word = *src++;
			INTEL_SWAP16( src_word );

			src_cnt++;
			alpha = ( src_word >> 8 ) & 0x00ffL;
			src_alpha += alpha;
			src_gray += ( src_word & 0x00ffL ) * alpha;

			e += dw;
		}

		dst_word = ( src_alpha / src_cnt ) << 8;

		if ( src_alpha )
		{
			src_gray /= src_alpha;
			dst_word |= src_gray;
		}

		INTEL_SWAP16( dst_word );
		*dst++ = (UWORD) dst_word;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (24 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_ABC24( LONG e, LONG sw, LONG dw, LONG cnt,
												 LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
												 UCHAR *src, UCHAR *dst )
{
	(void) bits;

	src += src_bit_offset >> 3;
	dst += dst_bit_offset >> 3;

	while ( cnt >= 0 )
	{
		LONG		shifts;
		LONG		src_cnt;
		ULONG	src_red;
		ULONG	src_green;
		ULONG	src_blue;

		src_cnt = 0;
		src_red = 0;
		src_green = 0;
		src_blue = 0;

		while ( e < 0 )
		{
			src_cnt++;
			src_red += *src++;
			src_green += *src++;
			src_blue += *src++;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )
		{
			src_red >>= shifts;
			src_green >>= shifts;
			src_blue >>= shifts;
		}
		else
		{
			src_red /= src_cnt;
			src_green /= src_cnt;
			src_blue /= src_cnt;
		}

		*dst++ = src_red;
		*dst++ = src_green;
		*dst++ = src_blue;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (32 bits, 3 components  8 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_xABC24( LONG e, LONG sw, LONG dw, LONG cnt,
													LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
													ULONG *src, ULONG *dst )
{
	(void) bits;

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;

	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG		shifts;
		LONG		src_cnt;
		ULONG	src_red;
		ULONG	src_green;
		ULONG	src_blue;

		src_cnt = 0;
		src_red = 0;
		src_green = 0;
		src_blue = 0;

		while ( e < 0 )
		{
			src_word = *src++;
			INTEL_SWAP32( src_word );

			src_cnt++;
			src_red += ( src_word >> 16 ) & 0x000000ffL;
			src_green += ( src_word >> 8 ) & 0x000000ffL;
			src_blue += src_word & 0x000000ffL;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )
		{
			src_red >>= shifts;
			src_green >>= shifts;
			src_blue >>= shifts;
		}
		else
		{
			src_red /= src_cnt;
			src_green /= src_cnt;
			src_blue /= src_cnt;
		}

		dst_word = ( src_red << 16 ) | ( src_green << 8 ) | src_blue;
		INTEL_SWAP32( dst_word );
		*dst++ = dst_word;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (32 bits, 4 independent components  8 bits)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_ABCD32( LONG e, LONG sw, LONG dw, LONG cnt,
								  LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
								  ULONG *src, ULONG *dst )
{
	(void) bits;

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;

	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG		shifts;
		LONG		src_cnt;
		ULONG	src_a;
		ULONG	src_b;
		ULONG	src_c;
		ULONG	src_d;

		src_cnt = 0;
		src_a = 0;
		src_b = 0;
		src_c = 0;
		src_d = 0;

		while ( e < 0 )
		{
			src_word = *src++;
			INTEL_SWAP32( src_word );

			src_cnt++;
			src_a += ( src_word >> 24 ) & 0x000000ffL;
			src_b += ( src_word >> 16 ) & 0x000000ffL;
			src_c += ( src_word >> 8 ) & 0x000000ffL;
			src_d += src_word & 0x000000ffL;

			e += dw;
		}

		shifts = -1;
		if ( src_cnt < 256 )
			shifts = cnt_to_shifts[src_cnt - 1];
		
		if ( shifts >= 0 )
		{
			src_a >>= shifts;
			src_b >>= shifts;
			src_c >>= shifts;
			src_d >>= shifts;
		}
		else
		{
			src_a /= src_cnt;
			src_b /= src_cnt;
			src_c /= src_cnt;
			src_d /= src_cnt;
		}

		dst_word = ( src_a << 24 ) | ( src_b << 16 ) | ( src_c << 8 ) | src_d;
		INTEL_SWAP32( dst_word );
		*dst++ = dst_word;

		e -= sw;
		cnt--;
	}
}

//----------------------------------------------------------------------------------------
// Shrink a line (32 bits, 4 components  8 bits, first one is alpha)
// Function result:		-
// e:									negative source width = - sw (discriminator start value)
// sw:								source width (error for a step to the next destination pixel)
// dw:								destination width (error for a step to the next source pixel)
// cnt:								number of output pixels - 1
// bits:							bits per pixel
// src_bit_offset:		number of unused (left) bits in the first source word
// dst_bit_offset:		number of unused (left) bits in the first destination word
// src:								source address
// dst:								destination address
//----------------------------------------------------------------------------------------
void	shrink_line_ARGB32( LONG e, LONG sw, LONG dw, LONG cnt,
								  LONG bits, LONG src_bit_offset, LONG dst_bit_offset,
								  ULONG *src, ULONG *dst )
{
	(void) bits;

	src += src_bit_offset >> 5;
	dst += dst_bit_offset >> 5;

	while ( cnt >= 0 )
	{
		ULONG	src_word;
		ULONG	dst_word;
		LONG	src_cnt;
		ULONG	src_alpha;
		ULONG	src_red;
		ULONG	src_green;
		ULONG	src_blue;

		src_cnt = 0;
		src_alpha = 0;
		src_red = 0;
		src_green = 0;
		src_blue = 0;

		while ( e < 0 )
		{
			ULONG	alpha;

			src_word = *src++;
			INTEL_SWAP32( src_word );

			src_cnt++;
			alpha = ( src_word >> 24 ) & 0x000000ffL;
			src_alpha += alpha;
			src_red += (( src_word >> 16 ) & 0x000000ffL ) * alpha;
			src_green += (( src_word >> 8 ) & 0x000000ffL ) * alpha;
			src_blue += ( src_word & 0x000000ffL ) * alpha;

			e += dw;
		}

		dst_word = (( src_alpha / src_cnt ) << 24 );

		if ( src_alpha )
		{
			src_red /= src_alpha;
			src_green /= src_alpha;
			src_blue /= src_alpha;
			dst_word |= ( src_red << 16 ) | ( src_green << 8 ) | src_blue;
		}

		INTEL_SWAP32( dst_word );
		*dst++ = dst_word;

		e -= sw;
		cnt--;
	}
}
